# Preperations ------------------------------------------------------------
rm(list=ls())
library(tidyverse);library(viridis);library(wesanderson);library(pals);library(readtext);library(stringi);library(ggpubr);library(gt);library(xtable);library(readxl);library(haven)


# Read data ---------------------------------------------------------------
cy_omg <- read_rds("Data/TheOMGDataset/R/cy_omg.rds")
omg_cy <- read_rds("Data/TheOMGDataset/R/omg_cy.rds")
omg_my <- read_rds("Data/TheOMGDataset/R/omg_my.rds")
omg    <- read_rds("Data/TheOMGDataset/R/omg.rds")
omg_coverage    <- read_rds("Data/TheOMGDataset/R/omg_coverage.rds")


## Add region --------------------------------------------------------------
omg <- full_join(omg, na.omit(unique(omg_cy[, c("country_id", "e_regionpol_6C")])))
omg$e_regionpol_6C[which(is.na(omg$e_regionpol_6C)==TRUE)] <- 3  #Western Sahara
omg$region <- case_match(omg$e_regionpol_6C,
                         1 ~ "Eastern Europe and\nCentral Asia",
                         2 ~ "Latin America and\nCaribbean",
                         3 ~ "Middle East and\nNorth Africa",
                         4 ~ "Sub-Saharan Africa",
                         5 ~ "Western Europe and\nNorth America",
                         6 ~ "Asia and Pacific")



# Make filter for whenever we want to remove pro-regime campai --------
omg$opposition_filter <- ifelse(omg$demand_regime_anti==1, 1,
                                ifelse(omg$demand_gov_anti==1, 1, 0))




# All opposition campaigns-frame ------------------------------------------------
all_campaigns_trend <- omg %>% subset(opposition_filter == 0) %>% group_by(start_year) %>% summarise(campaigns = n())
all_campaigns_trend <- full_join(all_campaigns_trend, tibble("start_year" = 1787:2019))
all_campaigns_trend$campaigns[which(is.na(all_campaigns_trend$campaigns)==TRUE)] <- 0
all_campaigns_trend <- all_campaigns_trend %>% arrange(start_year) %>% mutate(cumulative_campaigns = cumsum(campaigns))

all_campaigns_trend <- all_campaigns_trend %>% mutate("campaigns_lag1" = lag(campaigns, 1),
                                                      "campaigns_lag2" = lag(campaigns, 2),
                                                      "campaigns_lag3" = lag(campaigns, 3),
                                                      "campaigns_lag4" = lag(campaigns, 4),
                                                      "campaigns_lag5" = lag(campaigns, 5),
                                                      "campaigns_lag6" = lag(campaigns, 6),
                                                      "campaigns_lag7" = lag(campaigns, 7),
                                                      "campaigns_lag8" = lag(campaigns, 8),
                                                      "campaigns_lag9" = lag(campaigns, 9))
all_campaigns_trend$campaigns_10year_sum <- rowSums(all_campaigns_trend[, grep("^campaigns", colnames(all_campaigns_trend), value = TRUE)], na.rm = TRUE)
all_campaigns_trend[, grep("_lag", colnames(all_campaigns_trend), value = TRUE)] <- NULL


# All campaigns including pro-regime-frame --------------------------------------
all_campaigns_trend_incl_pro <- omg %>% group_by(start_year) %>% summarise(campaigns = n())
all_campaigns_trend_incl_pro <- full_join(all_campaigns_trend_incl_pro, tibble("start_year" = 1787:2019))
all_campaigns_trend_incl_pro$campaigns[which(is.na(all_campaigns_trend_incl_pro$campaigns)==TRUE)] <- 0
all_campaigns_trend_incl_pro <- all_campaigns_trend_incl_pro %>% arrange(start_year) %>% mutate(cumulative_campaigns = cumsum(campaigns))

all_campaigns_trend_incl_pro <- all_campaigns_trend_incl_pro %>% mutate("campaigns_lag1" = lag(campaigns, 1),
                                                      "campaigns_lag2" = lag(campaigns, 2),
                                                      "campaigns_lag3" = lag(campaigns, 3),
                                                      "campaigns_lag4" = lag(campaigns, 4),
                                                      "campaigns_lag5" = lag(campaigns, 5),
                                                      "campaigns_lag6" = lag(campaigns, 6),
                                                      "campaigns_lag7" = lag(campaigns, 7),
                                                      "campaigns_lag8" = lag(campaigns, 8),
                                                      "campaigns_lag9" = lag(campaigns, 9))
all_campaigns_trend_incl_pro$campaigns_10year_sum <- rowSums(all_campaigns_trend_incl_pro[, grep("^campaigns", colnames(all_campaigns_trend_incl_pro), value = TRUE)], na.rm = TRUE)
all_campaigns_trend_incl_pro[, grep("_lag", colnames(all_campaigns_trend_incl_pro), value = TRUE)] <- NULL


# All campaigns by region incl pro-regime-frame ---------------------------
all_campaigns_region_trend <- omg %>% group_by(region, start_year) %>% summarise(campaigns = n())
all_campaigns_region_trend <- full_join(all_campaigns_region_trend, expand.grid("start_year" = 1787:2019, "region" = unique(all_campaigns_region_trend$region)))
all_campaigns_region_trend$campaigns[which(is.na(all_campaigns_region_trend$campaigns)==TRUE)] <- 0
all_campaigns_region_trend <- all_campaigns_region_trend %>% arrange(region, start_year) %>% group_by(region) %>% mutate(cumulative_campaigns = cumsum(campaigns))

all_campaigns_region_trend <- all_campaigns_region_trend %>% group_by(region) %>% mutate("campaigns_lag1" = lag(campaigns, 1),
                                                                                         "campaigns_lag2" = lag(campaigns, 2),
                                                                                         "campaigns_lag3" = lag(campaigns, 3),
                                                                                         "campaigns_lag4" = lag(campaigns, 4),
                                                                                         "campaigns_lag5" = lag(campaigns, 5),
                                                                                         "campaigns_lag6" = lag(campaigns, 6),
                                                                                         "campaigns_lag7" = lag(campaigns, 7),
                                                                                         "campaigns_lag8" = lag(campaigns, 8),
                                                                                         "campaigns_lag9" = lag(campaigns, 9))
all_campaigns_region_trend$campaigns_10year_sum <- rowSums(all_campaigns_region_trend[, grep("^campaigns", colnames(all_campaigns_region_trend), value = TRUE)], na.rm = TRUE)
all_campaigns_region_trend <- all_campaigns_region_trend %>% group_by(region) %>% mutate("campaigns_lag10" = lag(campaigns, 10),
                                                                                         "campaigns_lag11" = lag(campaigns, 11),
                                                                                         "campaigns_lag12" = lag(campaigns, 12),
                                                                                         "campaigns_lag13" = lag(campaigns, 13),
                                                                                         "campaigns_lag14" = lag(campaigns, 14),
                                                                                         "campaigns_lag15" = lag(campaigns, 15),
                                                                                         "campaigns_lag16" = lag(campaigns, 16),
                                                                                         "campaigns_lag17" = lag(campaigns, 17),
                                                                                         "campaigns_lag18" = lag(campaigns, 18),
                                                                                         "campaigns_lag19" = lag(campaigns, 19))
all_campaigns_region_trend$campaigns_20year_sum <- rowSums(all_campaigns_region_trend[, grep("^campaigns", colnames(all_campaigns_region_trend), value = TRUE)], na.rm = TRUE)
all_campaigns_region_trend[, grep("_lag", colnames(all_campaigns_region_trend), value = TRUE)] <- NULL

####      Colorpalette?     #####
c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "brown",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "gold1"
)


# Descriptive stats -------------------------------------------------------
descdat <- omg
descdat$campaign_no <- 1:nrow(descdat)

descdat <- descdat %>% select(where(is.numeric))
descdat <- descdat %>% pivot_longer(cols = setdiff(colnames(descdat), "campaign_no"), names_to = "Variable", values_to = "Value")


#Remove dummy-versions
descdat <- descdat[!grepl("peak_.*[0-9]$", descdat$Variable),]
descdat <- descdat[which(descdat$Variable!="opposition_filter"),]

#Make variable-group for a more aesthetic figure
descdat$vargroup <- ifelse(grepl("^nv_|^v_|^strategy_camp", descdat$Variable), "Violence strategy",
                           ifelse(grepl("atleast_", descdat$Variable), "Social group participation",
                                  ifelse(grepl("dominate", descdat$Variable), "Social group dominate",
                                         ifelse(grepl("originate", descdat$Variable), "Social group originate",
                                                ifelse(grepl("org_", descdat$Variable), "Org. participation",
                                                       ifelse(grepl("demand_", descdat$Variable), "Demand",
                                                              ifelse(grepl("rel_", descdat$Variable), "Religion",
                                                                     ifelse(grepl("end_|start_|date_", descdat$Variable), "Time",
                                                                            ifelse(grepl("coord_leaders", descdat$Variable), "Other",
                                                                                   ifelse(grepl("^peak_", descdat$Variable), "Other",
                                                                                          ifelse(grepl("size_without_petition", descdat$Variable), "Other",
                                                                                                 ifelse(grepl("ide_", descdat$Variable), "Ideology", "NotShow"))))))))))))


descdat <- descdat %>% ungroup() %>% group_by(vargroup, Variable) %>% summarise(
  "Min"          = min(Value, na.rm=T),
  "1st qty."     = quantile(Value, 0.25, na.rm=T),
  "Median"       = median(Value, na.rm=T),
  "3rd qty."     = quantile(Value, 0.75, na.rm=T),
  "Mean"         = round(mean(Value, na.rm=T), 2),
  "Max"          = max(Value, na.rm=T),
  "SD"           = round(sd(Value, na.rm=T), 2))

#
descdat$Variable <-  gsub("_", " ", 
                          gsub("^v_", "", 
                               gsub("^nv_", "", 
                                    gsub("^ide_", "", 
                                         gsub("demand_", "", 
                                              gsub("^org_", "", 
                                                   gsub("originate_", "",
                                                        gsub("dominate_", "",
                                                             gsub("atleast_", "", 
                                                                  gsub("rel_", "", descdat$Variable))))))))))


#Remove external variables
descdat <- descdat[which(descdat$Variable!="e regionpol 6C"),]
descdat <- descdat[which(descdat$Variable!="region"),]
descdat <- descdat[which(descdat$Variable!="Team no"),]
descdat <- descdat[which(descdat$Variable!="change note"),]


#Variable names
descdat$Variable_label <- str_to_sentence(descdat$Variable)
descdat$Variable_label <- gsub("^Id", "ID" , descdat$Variable_label)
descdat$Variable_label <- gsub(" id", " ID", descdat$Variable_label)
descdat$Variable_label <- gsub("[Cc]code", "CCODE", descdat$Variable_label)
descdat$Variable_label <- gsub("prec$"   , "precision", descdat$Variable_label)
descdat$Variable_label <- gsub("Civilrights"   , "Civil rights", descdat$Variable_label)
descdat$Variable_label <- gsub("[Ss]ocialgroup"   , "Social group", descdat$Variable_label)
descdat$Variable_label <- gsub("Polparty"   , "Political party", descdat$Variable_label)

descdat$Variable_label <- gsub("^Main ", "Main: ", descdat$Variable_label)


descdat$Variable_label <- case_match(descdat$Variable_label,
                                     "Demo" ~ "Democracy",
                                     "Coord leaders" ~ "Coordinated leadership",
                                     "Weapon acq" ~ "Weapon acquisition",
                                     "Indwork" ~ "Industrial workers",
                                     "Nonindurban" ~ "Non-industrial urban workers",
                                     "Pubemp" ~ "Public employees",
                                     "Business" ~ "Business elites",
                                     "Agrarianelites" ~ "Agrarian elites",
                                     "Urb middle class" ~ "Urban middle class",
                                     "Relethnic" ~ "Religious or ethnic",
                                     "Milemp" ~ "Military employees",
                                     "Laborunion" ~ "Labor union",
                                     "Militaryvet" ~ "Military veterans",
                                     "Womens" ~ "Women",
                                     "Militarygovt" ~ "Regime security forces",
                                     "Weapon train" ~ "Weapon training",
                                     "Civilsociety other" ~ "Civil society, other",
                                     "Democracy hr" ~ "Democracy and HR",
                                     "Rural" ~ "Rural workers",
                                     "Pride antipride campaign" ~ "Pride-antipride campaign",
                                     "Strategy camp" ~ "Campaign strategy",
                                     
                                     "Civil rights anti" ~ "Institutional: Anti-civil rights",
                                     "Civil rights" ~ "Institutional: Civil rights",
                                     
                                     "Election anti" ~ "Institutional: Anti-Electoral changes",
                                     "Election" ~ "Institutional: Electoral changes",
                                     
                                     "Executive anti" ~ "Institutional: Anti-constrain executive",
                                     "Executive" ~ "Institutional: Constrain executive",
                                     
                                     "Free expression anti" ~ "Institutional: Anti-freedom of expression",
                                     "Free expression" ~ "Institutional: Freedom of expression",
                                     
                                     "Political power anti" ~ "Institutional: Anti-Political power",
                                     "Political power" ~ "Institutional: Political power",
                                     
                                     "Regime anti" ~ "Anti-regime change",
                                     "Regime" ~ "Regime change",
                                     
                                     "Government" ~ "Remove government",
                                     "Government anti" ~ "Anti-remove government",
                                     
                                     .default = descdat$Variable_label)

descdat$Variable_label <- ifelse(grepl(" anti$", descdat$Variable_label), gsub("(.*) anti$", "Anti-\\1", descdat$Variable_label), descdat$Variable_label)
descdat$Variable_label <- ifelse(grepl("^Anti-Main:", descdat$Variable_label), gsub("Anti-Main: ", "Main: Anti-", descdat$Variable_label), descdat$Variable_label)
descdat$Variable_label <- gsub("gov$", "government", descdat$Variable_label)
descdat$Variable_label <- gsub("Government ", "Government: ", descdat$Variable_label)
descdat$Variable_label <- gsub("^Government:", "Remove government:", descdat$Variable_label)
descdat$Variable_label <- gsub("hos only", "HOS only", descdat$Variable_label)

descdat$Variable_label <- factor(descdat$Variable_label, levels = c("Democracy" ,"Autonomy", "Secession", "Regime change", "Anti-regime change", "Institutional: Political power",
                                          setdiff(unique(descdat$Variable_label), c("Democracy", "Autonomy", "Secession", "Regime change", "Anti-regime change", "Institutional: Political power"))
                                          ))
descdat <- descdat[order(descdat$vargroup, descdat$Variable_label),]


#Remove some
descdat <- descdat %>% subset(vargroup!="NotShow")
descdat <- descdat %>% select(vargroup, Variable_label, Min:SD)

#Hard-code number of digits
descdat$Min <- as.character(descdat$Min)
descdat$Max <- as.character(descdat$Max)
descdat$Mean <- as.character(descdat$Mean)
descdat$Median <- as.character(descdat$Median)
descdat$`1st qty.` <- as.character(descdat$`1st qty.`)
descdat$`3rd qty.` <- as.character(descdat$`3rd qty.`)
descdat$SD <- as.character(descdat$SD)


#Separate desctables
descdat_sep1 <- descdat[!grepl("Social group|Violence strategy", descdat$vargroup), ]
descdat_sep2 <- descdat[grepl("Social group|Violence strategy", descdat$vargroup), ]



## Make table --------------------------------------------------------------

#Part 1
#Aesthetics
descdat_sep1$vargroup[duplicated(descdat_sep1$vargroup)] <- ""
colnames(descdat_sep1)[1] <- "Group"
colnames(descdat_sep1)[2] <- "Variable"


hlines1 <- which(descdat_sep1$Group!="")-1 #Do this before beautiful groups

#Beautiful groups
descdat_sep1$Group[grep("Org", descdat_sep1$Group)+1] <- "participation"
descdat_sep1$Group[grep("Org", descdat_sep1$Group)] <- "Organizational"

descdat_sep1$Group[grep("Violence strat", descdat_sep1$Group)+1] <- "strategy"
descdat_sep1$Group[grep("Violence strat", descdat_sep1$Group)] <- "Violence"

#Print
print(xtable(descdat_sep1, caption = "Descriptive statistics for numeric variables",
             label = "tab:numeric_desc_part1",
             align = "l|l|l|ccccccc"),
      include.rownames = FALSE,
      table.placement = "!htb",
      size = "scriptsize",
      hline.after = hlines1,
      caption.placement = "top",
      file = "Output/TableB1.tex")

#Part 2
#Aesthetics
descdat_sep2$vargroup[duplicated(descdat_sep2$vargroup)] <- ""
colnames(descdat_sep2)[1] <- "Group"
colnames(descdat_sep2)[2] <- "Variable"

hlines2 <- which(descdat_sep2$Group!="")-1 #Do this before beautiful groups
#Beautiful groups
descdat_sep2$Group[grep("Social group dominate", descdat_sep2$Group)+1] <- "dominate"
descdat_sep2$Group[grep("Social group dominate", descdat_sep2$Group)] <- "Social group"

descdat_sep2$Group[grep("Social group participation", descdat_sep2$Group)+1] <- "participation"
descdat_sep2$Group[grep("Social group participation", descdat_sep2$Group)] <- "Social group"

descdat_sep2$Group[grep("Social group originate", descdat_sep2$Group)+1] <- "originate"
descdat_sep2$Group[grep("Social group originate", descdat_sep2$Group)] <- "Social group"

print(xtable(descdat_sep2, caption = "Descriptive statistics for numeric variables continued - Social group and strategy variables",
             label = "tab:numeric_desc_part2",
             align = "l|l|l|ccccccc"),
      include.rownames = FALSE,
      table.placement = "!htb",
      size = "scriptsize",
      hline.after = hlines2,
      caption.placement = "top",
      file = "Output/TableB2.tex")


# Missingness -------------------------------------------------------------

## Allmissing --------------------------------------------------------------
missdat <- data.frame(is.na(omg[ , ]))
missdat$campaign_no <- 1:nrow(missdat)
missdat <- missdat %>% pivot_longer(cols = setdiff(colnames(missdat), "campaign_no"), names_to = "Variable", values_to = "Missing")

#Remove dummy-versions
missdat <- missdat[!grepl("peak_.*[0-9]$", missdat$Variable),]
missdat <- missdat[which(missdat$Variable!="opposition_filter"),]

missdat$vargroup <- ifelse(grepl("^nv_|^v_|^strategy_camp", missdat$Variable), "Violence strategy",
                           ifelse(grepl("atleast_", missdat$Variable), "Social group participation",
                                  ifelse(grepl("dominate", missdat$Variable), "Social group dominate",
                                         ifelse(grepl("originate", missdat$Variable), "Social group originate",
                                                ifelse(grepl("org_", missdat$Variable), "Org. participation",
                                                       ifelse(grepl("demand_", missdat$Variable), "Demand",
                                                              ifelse(grepl("rel_", missdat$Variable), "Religion",
                                                                     ifelse(grepl("end_|start_|date_", missdat$Variable), "Time",
                                                                                   ifelse(grepl("coord_leaders", missdat$Variable), "Other",
                                                                                          ifelse(grepl("^peak_", missdat$Variable), "Other",
                                                                                                 ifelse(grepl("size_without_petition", missdat$Variable), "Other",
                                                                                                        ifelse(grepl("ide_", missdat$Variable), "Ideology", "NotShow"))))))))))))


#Remove variables (do this before calculating missing)
missdat <- missdat[grep("change_note", missdat$Variable, invert = TRUE), ]
missdat <- missdat[grep("demand_note", missdat$Variable, invert = TRUE), ]
missdat <- missdat[grep("e_regionpol_6C", missdat$Variable, invert = TRUE), ]
missdat <- missdat[grep("region", missdat$Variable, invert = TRUE), ]
missdat <- missdat[grep("team", missdat$Variable, invert = TRUE), ]

#Calculate missing
missdat <- missdat %>% group_by(vargroup) %>% mutate("n_vars" = length(unique(Variable)),
                                                     "n_obs" = n(),
                                                     "n_missing" = sum(Missing)) %>% mutate("share_missing" = n_missing/n_obs)

missdat$grouplabel <- paste0(missdat$vargroup, " | Variables: ", missdat$n_vars, " | Pct. missing obs.: ", round(missdat$share_missing*100, 1), "%" )

missdat$Variable <-  gsub("_", " ", 
                          gsub("^v_", "", 
                               gsub("^nv_", "", 
                                    gsub("^ide_", "", 
                                         gsub("demand_", "", 
                                              gsub("^org_", "", 
                                                   gsub("originate_", "",
                                                        gsub("dominate_", "",
                                                             gsub("atleast_", "", 
                                                                  gsub("rel_", "", missdat$Variable))))))))))

missdat$Missing <- case_match(missdat$Missing,
                              FALSE ~ "Not missing",
                              TRUE ~ "Missing")


#Variable names
missdat$Variable_label <- str_to_sentence(missdat$Variable)
missdat$Variable_label <- gsub("^Id", "ID" , missdat$Variable_label)
missdat$Variable_label <- gsub(" id", " ID", missdat$Variable_label)
missdat$Variable_label <- gsub("[Cc]code", "CCODE", missdat$Variable_label)
missdat$Variable_label <- gsub("prec$"   , "precision", missdat$Variable_label)
missdat$Variable_label <- gsub("Civilrights"   , "Civil rights", missdat$Variable_label)
missdat$Variable_label <- gsub("[Ss]ocialgroup"   , "Social group", missdat$Variable_label)
missdat$Variable_label <- gsub("Polparty"   , "Political party", missdat$Variable_label)


missdat$Variable_label <- gsub("^Main ", "Main: ", missdat$Variable_label)

missdat$Variable_label <- case_match(missdat$Variable_label,
                                     "Demo" ~ "Democracy",
                                     "Coord leaders" ~ "Coordinated leadership",
                                     "Weapon acq" ~ "Weapon acquisition",
                                     "Indwork" ~ "Industrial workers",
                                     "Nonindurban" ~ "Non-industrial urban workers",
                                     "Pubemp" ~ "Public employees",
                                     "Business" ~ "Business elites",
                                     "Agrarianelites" ~ "Agrarian elites",
                                     "Urb middle class" ~ "Urban middle class",
                                     "Relethnic" ~ "Religious or ethnic",
                                     "Milemp" ~ "Military employees",
                                     "Laborunion" ~ "Labor union",
                                     "Militaryvet" ~ "Military veterans",
                                     "Womens" ~ "Women",
                                     "Militarygovt" ~ "Regime security forces",
                                     "Weapon train" ~ "Weapon training",
                                     "Civilsociety other" ~ "Civil society, other",
                                     "Democracy hr" ~ "Democracy and HR",
                                     "Rural" ~ "Rural workers",
                                     "Pride antipride campaign" ~ "Pride-antipride campaign",
                                     "Strategy camp" ~ "Campaign strategy",
                                     
                                     "Civil rights anti" ~ "Institutional: Anti-civil rights",
                                     "Civil rights" ~ "Institutional: Civil rights",
                                     
                                     "Election anti" ~ "Institutional: Anti-Electoral changes",
                                     "Election" ~ "Institutional: Electoral changes",
                                     
                                     "Executive anti" ~ "Institutional: Anti-constrain executive",
                                     "Executive" ~ "Institutional: Constrain executive",
                                     
                                     "Free expression anti" ~ "Institutional: Anti-freedom of expression",
                                     "Free expression" ~ "Institutional: Freedom of expression",
                                     
                                     "Political power anti" ~ "Institutional: Anti-Political power",
                                     "Political power" ~ "Institutional: Political power",
                                     
                                     "Regime anti" ~ "Anti-regime change",
                                     "Regime" ~ "Regime change",
                                     
                                     "Government" ~ "Remove government",
                                     "Government anti" ~ "Anti-remove government",
                                     
                                     "Main" ~ "Main demand",
                                     
                                     .default = missdat$Variable_label)

missdat$Variable_label <- ifelse(grepl(" anti$", missdat$Variable_label), gsub("(.*) anti$", "Anti-\\1", missdat$Variable_label), missdat$Variable_label)
missdat$Variable_label <- ifelse(grepl("^Anti-Main:", missdat$Variable_label), gsub("Anti-Main: ", "Main: Anti-", missdat$Variable_label), missdat$Variable_label)
missdat$Variable_label <- gsub("gov$", "government", missdat$Variable_label)
missdat$Variable_label <- gsub("Government ", "Government: ", missdat$Variable_label)
missdat$Variable_label <- gsub("^Government:", "Remove government:", missdat$Variable_label)
missdat$Variable_label <- gsub("hos only", "HOS only", missdat$Variable_label)

missdat$Variable_label <- factor(missdat$Variable_label, levels = c("Democracy" ,"Autonomy", "Secession", "Regime change", "Anti-regime change",
                                                                    "Remove government", "Anti-remove government", "Remove government: HOS only",
                                                                    "Remove government: part of cabinet", "Remove government: cabinet", "Remove government: any",
                                                                    unique(grep("^Institutional", missdat$Variable_label, value =T)),
                                                                    
                                                                    setdiff(unique(missdat$Variable_label), c("Democracy" ,"Autonomy", "Secession", "Regime change", "Anti-regime change",
                                                                                                              "Remove government", "Anti-remove government", "Remove government: HOS only",
                                                                                                              "Remove government: part of cabinet", "Remove government: cabinet", "Remove government: any",
                                                                                                              unique(grep("^Institutional", missdat$Variable_label, value =T)) )) ))


missdat <- missdat[order(missdat$vargroup, missdat$Variable_label),]


### Separate missing figures for better space -------------------------------
missdat_sep1 <- missdat[!grepl("Social group|Violence strategy", missdat$vargroup), ]
missdat_sep2 <- missdat[grepl("Social group|Violence strategy", missdat$vargroup), ]


#### Part 1 ------------------------------------------------------------------
p <- missdat_sep1 %>% subset(vargroup!="NotShow" & vargroup!="Time") %>% ggplot(aes(x = campaign_no, y = Variable_label, fill = Missing)) +
  geom_raster() +
  #scale_fill_viridis(discrete = TRUE) +
  xlab("Campaign number") + ylab("") +
  scale_y_discrete(limits=rev) +
  scale_x_continuous(expand = c(0,0)) +
  geom_hline(yintercept = seq(1, length(unique(missdat_sep1$Variable_label)), 1) + 0.5, color = "grey80") +
  scale_fill_manual(values = c("black", "white")) +
  theme_classic() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text.y = element_text(size = 16),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 14),
        axis.title = element_text(size = 14),
        strip.text = element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.background = element_rect(fill = "grey60", color = "grey60"))

#facet_wrap(~vargroup, scales = "free_y", ncol = 1, strip.position = "top")
p.grid <- p + facet_grid(grouplabel ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ grouplabel, ncol = 1, scales = "free_y")

# convert both into grob objects
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)

# apply the panel heights of the facet_grid version to the facet_wrap one
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <- 
  gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]

# plot the facet_wrap version (Can't use ggsave)
jpeg("Output/FigureB3.jpg", width = 10, height = 13, units = "in", res = 350)
grid::grid.draw(gp.wrap)
dev.off()

#### Part 2 ------------------------------------------------------------------
p <- missdat_sep2 %>% subset(vargroup!="NotShow" & vargroup!="Time") %>%
  ggplot(aes(x = campaign_no, y = Variable_label, fill = Missing)) +
  geom_raster() +
  xlab("Campaign number") + ylab("") +
  scale_y_discrete(limits=rev) +
  scale_x_continuous(expand = c(0,0)) +
  scale_fill_manual(values = c("black", "white")) +
  geom_hline(yintercept = seq(1, length(unique(missdat_sep2$Variable_label)), 1) + 0.5, color = "grey80") +
  theme_classic() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text.y = element_text(size = 16),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 14),
        axis.title = element_text(size = 14),
        strip.text = element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.background = element_rect(fill = "grey60", color = "grey60"))

p.grid <- p + facet_grid(grouplabel ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ grouplabel, ncol = 1, scales = "free_y")

# convert both into grob objects
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)

# apply the panel heights of the facet_grid version to the facet_wrap one
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <- 
  gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]

# plot the facet_wrap version (Can't use ggsave)
jpeg("Output/FigureB4.jpg", width = 10, height = 13, units = "in", res = 350)
grid::grid.draw(gp.wrap)
dev.off()



# Global onset over time by strategy --------------------------------------
global_trends <- omg %>% subset(opposition_filter==0) %>%
  group_by(start_year, campaign_strategy_violent) %>% summarise(onsets = n())



global_trends <- full_join(global_trends, expand.grid("start_year" = 1787:2019, "campaign_strategy_violent"  = 0:1))
global_trends$onsets[which(is.na(global_trends$onsets)==TRUE)] <- 0
global_trends <- global_trends[order(global_trends$campaign_strategy_violent, global_trends$start_year),]
global_trends <- global_trends %>% group_by(campaign_strategy_violent) %>% mutate(cumulative_onsets = cumsum(onsets))
global_trends$strategy <- ifelse(global_trends$campaign_strategy_violent==1, "Violent", "Non-violent")


cumulative <- ggplot(global_trends, aes(x = start_year, y = cumulative_onsets, color = strategy)) + 
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1790, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 1000, 100)) +
  ylab("") + xlab("") +
  scale_color_manual(values = c("grey10", "grey60")) +
  labs(title = "Cumulative number of campaigns") +
  theme_minimal() +
  theme(legend.position = "none",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22),
        plot.title = element_text(size = 22))

onset <- ggplot(global_trends, aes(x = start_year, y = onsets, color = strategy, fill=strategy)) + 
  geom_area(position = "stack") +
  scale_x_continuous(breaks = seq(1790, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 50, 10), limits = c(0, 50)) +
  ylab("") + xlab("") +
  scale_color_manual(values = c("grey10", "grey60")) +
  scale_fill_manual(values = c("grey10", "grey60")) +
  labs(title = "Onsets") +
  theme_minimal() +
  theme(legend.position = "right",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22),
        plot.title = element_text(size = 22))

ggarrange(cumulative, onset, ncol=1, nrow=2, heights = c(2,1), common.legend = TRUE, legend="right", legend.grob = get_legend(onset))
ggsave("Output/Figure2.jpg", dpi = 350, width = 16, height = 12)


# Global onset over time by strategy with flank ---------------------------
omg$strategy_with_flank <- ifelse(omg$campaign_strategy_violent==1 & omg$v_nonviolent_flank==0, "Violent",
                                  ifelse(omg$campaign_strategy_violent==1 & omg$v_nonviolent_flank==1, "Violent with\nnon-violent flank",
                                         ifelse(omg$campaign_strategy_violent==0 & omg$nv_violent_flank==0, "Non-violent",
                                                ifelse(omg$campaign_strategy_violent==0 & omg$nv_violent_flank==1, "Non-violent with\nviolent flank","What?"))))


global_trends <- omg %>% subset(strategy_with_flank!="What?") %>% subset(opposition_filter==0) %>%
  group_by(start_year, strategy_with_flank) %>% summarise(onsets = n())
global_trends <- full_join(global_trends, expand.grid("start_year" = 1787:2019, "strategy_with_flank"  = unique(global_trends$strategy_with_flank)))
global_trends$onsets[which(is.na(global_trends$onsets)==TRUE)] <- 0
global_trends <- global_trends[order(global_trends$strategy_with_flank, global_trends$start_year),]
global_trends <- global_trends %>% group_by(strategy_with_flank) %>% mutate(cumulative_onsets = cumsum(onsets))

global_trends$strategy_with_flank <- factor(global_trends$strategy_with_flank, levels = c("Violent", "Non-violent", "Violent with\nnon-violent flank", "Non-violent with\nviolent flank"))

cumulative <- ggplot(global_trends, aes(x = start_year, y = cumulative_onsets, color = strategy_with_flank)) + 
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1790, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 1000, 100)) +
  ylab("") + xlab("") +
  scale_color_manual(values = c("grey0","grey30","grey60", "grey80")) +
  labs(title = "Cumulative number of campaigns") +
  theme_minimal() +
  theme(legend.position = "none",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22),
        plot.title = element_text(size = 22))

onset <- ggplot(global_trends, aes(x = start_year, y = onsets, color = strategy_with_flank, fill=strategy_with_flank)) + 
  geom_area(position = "stack") +
  scale_x_continuous(breaks = seq(1790, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 50, 10), limits = c(0, 50)) +
  ylab("") + xlab("") +
  scale_color_manual(values = c("grey0","grey30","grey60", "grey80")) +
  scale_fill_manual(values = c("grey0","grey30","grey60", "grey80")) +
  labs(title = "Onsets") +
  theme_minimal() +
  theme(legend.position = "right",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22),
        plot.title = element_text(size = 22))

ggarrange(cumulative, onset, ncol=1, nrow=2, heights = c(2,1), common.legend = TRUE, legend="right", legend.grob = get_legend(onset))
ggsave("Output/FigureB5.jpg", dpi = 350, width = 14, height = 10)



# Dominating social group -------------------------------------------------
dominate_trends <- omg %>% select(c("start_year", starts_with("dominate"))) %>% pivot_longer(cols = starts_with("dominate"), names_to = "dominate_group")
dominate_trends <- dominate_trends %>% group_by(start_year, dominate_group) %>% summarise(onsets = sum(value, na.rm = TRUE))
dominate_trends <- full_join(dominate_trends,
                             expand.grid("start_year" = 1787:2019, "dominate_group" = unique(dominate_trends$dominate_group)))
dominate_trends$onsets[which(is.na(dominate_trends$onsets)==TRUE)] <- 0
dominate_trends <- dominate_trends[order(dominate_trends$dominate_group, dominate_trends$start_year),]
dominate_trends <- dominate_trends %>% group_by(dominate_group) %>% mutate(cumulative_onsets = cumsum(onsets))

#Remove relethnic since it can be combined with the other groups
dominate_trends <- dominate_trends[which(dominate_trends$dominate_group!="dominate_relethnic"),]

#Aesthetics
dominate_trends$dominate_group <- gsub("^dominate_", "", dominate_trends$dominate_group)
dominate_trends$dominate_group <- case_match(dominate_trends$dominate_group,
                                             "agrarianelites" ~ "Agrarian elites",
                                             "business" ~ "Business elites",
                                             "milemp" ~ "Military",
                                             "intellectuals" ~ "Intellectuals",
                                             "nonindurban" ~ "Non-industrial urban workers",
                                             "indwork" ~ "Industrial workers",
                                             "peasant" ~ "Peasants",
                                             "professionals" ~ "Professionals",
                                             "pubemp" ~ "Public employees",
                                             "relethnic" ~ "Religious/Ethnic",
                                             "rural" ~ "Rural groups",
                                             "students" ~ "Students",
                                             "urb_middle_class" ~ "Urban middle class",
                                             "workers_general" ~ "Workers in general")

#Sorting by 2019 size
tmp <- dominate_trends[which(dominate_trends$start_year==2019),]
dominate_trends$dominate_group <- factor(dominate_trends$dominate_group, levels = tmp$dominate_group[order(tmp$cumulative_onsets)])

#Linegraph
end_values <- dominate_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(dominate_group), NA),
                                         end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA))

dominate_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(dominate_group), NA),
                           end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA)) %>% 
  ggplot(aes(x = start_year, y = cumulative_onsets, color = dominate_group)) + 
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2020, 30)) +
  scale_y_continuous(breaks = seq(0, 220, 20)) +
  geom_text(aes(color = dominate_group, x = 2020,
                y = end_value, label = label), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5, nudge_y = ifelse(end_values$dominate_group=="Workers in general", 0,
                                                               ifelse(end_values$dominate_group=="Military", +2,
                                                                      ifelse(end_values$dominate_group=="Intellectuals", -5,
                                                                             ifelse(end_values$dominate_group=="Peasants", -1,
                                                                                    ifelse(end_values$dominate_group=="Students", +6,
                                                                                           ifelse(end_values$dominate_group=="Professionals", +5,
                                                                                                  ifelse(end_values$dominate_group=="Urban middle class", +2,
                                                                                                         ifelse(end_values$dominate_group=="Industrial workers", -2,
                                                                                                                ifelse(end_values$dominate_group=="Agrarian elites", +5,
                                                                                                                       ifelse(end_values$dominate_group=="Non-industrial urban workers", +4,
                                                                                                                              ifelse(end_values$dominate_group=="Rural groups", +4,
                                                                                                                                     ifelse(end_values$dominate_group=="Public employees", -2,
                                                                                                                                            ifelse(end_values$dominate_group=="Business elites", -6,
                                                                                                                                                   
                                                                                                                                                   0)))))))))))))) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,18,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/Figure5.jpg", dpi = 350, width = 18, height = 10)


# Dominating social group but only opposition -----------------------------
dominate_trends <- omg %>% subset(opposition_filter==0) %>%
  select(c("start_year", starts_with("dominate"))) %>% pivot_longer(cols = starts_with("dominate"), names_to = "dominate_group")
dominate_trends <- dominate_trends %>% group_by(start_year, dominate_group) %>% summarise(onsets = sum(value, na.rm = TRUE))
dominate_trends <- full_join(dominate_trends,
                             expand.grid("start_year" = 1787:2019, "dominate_group" = unique(dominate_trends$dominate_group)))
dominate_trends$onsets[which(is.na(dominate_trends$onsets)==TRUE)] <- 0
dominate_trends <- dominate_trends[order(dominate_trends$dominate_group, dominate_trends$start_year),]
dominate_trends <- dominate_trends %>% group_by(dominate_group) %>% mutate(cumulative_onsets = cumsum(onsets))

#Remove relethnic since it can be combined with the other groups
dominate_trends <- dominate_trends[which(dominate_trends$dominate_group!="dominate_relethnic"),]

#Aesthetics
dominate_trends$dominate_group <- gsub("^dominate_", "", dominate_trends$dominate_group)
dominate_trends$dominate_group <- case_match(dominate_trends$dominate_group,
                                             "agrarianelites" ~ "Agrarian elites",
                                             "business" ~ "Business elites",
                                             "milemp" ~ "Military",
                                             "intellectuals" ~ "Intellectuals",
                                             "nonindurban" ~ "Non-industrial urban workers",
                                             "indwork" ~ "Industrial workers",
                                             "peasant" ~ "Peasants",
                                             "professionals" ~ "Professionals",
                                             "pubemp" ~ "Public employees",
                                             "relethnic" ~ "Religious/Ethnic",
                                             "rural" ~ "Rural groups",
                                             "students" ~ "Students",
                                             "urb_middle_class" ~ "Urban middle class",
                                             "workers_general" ~ "Workers in general")

#Sorting by 2019 size
tmp <- dominate_trends[which(dominate_trends$start_year==2019),]
dominate_trends$dominate_group <- factor(dominate_trends$dominate_group, levels = tmp$dominate_group[order(tmp$cumulative_onsets)])


#Linegraph
end_values <- dominate_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(dominate_group), NA),
                                         end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA))

dominate_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(dominate_group), NA),
                           end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA)) %>% 
  ggplot(aes(x = start_year, y = cumulative_onsets, color = dominate_group)) + 
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2020, 30)) +
  scale_y_continuous(breaks = seq(0, 220, 20)) +
  geom_text(aes(color = dominate_group, x = 2020,
                y = end_value, label = label), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5, nudge_y = ifelse(end_values$dominate_group=="Workers in general", 0,
                                                               ifelse(end_values$dominate_group=="Military", +2,
                                                                      ifelse(end_values$dominate_group=="Intellectuals", -5,
                                                                             ifelse(end_values$dominate_group=="Peasants", -1,
                                                                                    ifelse(end_values$dominate_group=="Students", +6,
                                                                                           ifelse(end_values$dominate_group=="Professionals", +5,
                                                                                                  ifelse(end_values$dominate_group=="Urban middle class", +2,
                                                                                                         ifelse(end_values$dominate_group=="Industrial workers", -2,
                                                                                                                ifelse(end_values$dominate_group=="Agrarian elites", +5,
                                                                                                                       ifelse(end_values$dominate_group=="Non-industrial urban workers", +4,
                                                                                                                              ifelse(end_values$dominate_group=="Rural groups", +4,
                                                                                                                                     ifelse(end_values$dominate_group=="Public employees", -2,
                                                                                                                                            ifelse(end_values$dominate_group=="Business elites", -6,
                                                                                                                                                   
                                                                                                                                                   0)))))))))))))) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,18,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/FigureB9.jpg", dpi = 350, width = 18, height = 10)


# Trends in organization --------------------------------------------------
omg <- omg %>% ungroup() %>%
  mutate(across( starts_with("org_"), 
                 ~as.numeric((.x) ) ) )
omg <- omg %>% ungroup() %>%
  mutate(across( starts_with("org_"), 
                 ~ case_match(.x, -99 ~ 0,
                              .default = .x) ) )

## Organizations absolute --------------------------------------------------
org_trends <- omg %>%
  select(-ends_with("_name")) %>% 
  select(c("start_year", "coord_leaders", starts_with("org_"))) %>%
  pivot_longer(cols = starts_with("org_"), names_to = "organization")

# org_trends <- org_trends[!grepl("_name$", org_trends$organization),]
org_trends <- org_trends %>% group_by(start_year, organization) %>%
  summarise(onsets = sum(value, na.rm = TRUE))
org_trends <- full_join(org_trends,
                        expand.grid("start_year" = 1787:2019, "organization" = unique(org_trends$organization)))
org_trends$onsets[which(is.na(org_trends$onsets)==TRUE)] <- 0
org_trends <- full_join(org_trends, all_campaigns_trend)
org_trends$org_share_of_onsets <- org_trends$onsets/org_trends$campaigns
org_trends$org_share_of_onsets[which(org_trends$campaigns==0)] <- 0

org_trends <- org_trends[order(org_trends$organization, org_trends$start_year),]
org_trends <- org_trends %>% group_by(organization) %>% mutate(cumulative_onsets = cumsum(onsets),
                                                               cumulative_share_of_onsets = cumsum(org_share_of_onsets))


#Aesthetics
org_trends$organization <- gsub("^org_", "", org_trends$organization)

org_trends$organization <- case_match(org_trends$organization,
                                      "womens" ~ "Women's organization",
                                      "student" ~ "Student organization",
                                      "religious" ~ "Religious group",
                                      "rebels" ~ "Rebel group",
                                      "polparty" ~ "Political party",
                                      "other" ~ "Other organization",
                                      "militaryvet" ~ "Military veterans",
                                      "militarygovt" ~ "Military/Governemnt",
                                      "laborunion" ~ "Labor union",
                                      "civilsociety_other" ~ "Other CSO")

#Sorting by 2019 size
tmp <- org_trends[which(org_trends$start_year==2019),]
org_trends$organization <- factor(org_trends$organization, levels = tmp$organization[order(tmp$cumulative_onsets)])

### Share of total campaigns with organization ------------------------------
org_trends <- omg %>%
  select(c("start_year", "coord_leaders", starts_with("org_")))
org_trends <- org_trends[, !grepl("_name$", colnames(org_trends)),]
org_trends$n_orgs <- rowSums(org_trends[, grep("^org_", colnames(org_trends))], na.rm = TRUE)
org_trends$any_org <- ifelse(org_trends$n_orgs>0, 1, 0)

org_trends <- org_trends %>% group_by(start_year) %>% summarise("ave_social_groups" = mean(n_orgs),
                                                                "campaigns_with_org" = sum(any_org),
                                                                "campaigns_with_coordinated_leader" = sum(coord_leaders))
org_trends <- full_join(org_trends, tibble("start_year" = 1787:2019))
org_trends$campaigns_with_coordinated_leader[which(is.na(org_trends$campaigns_with_coordinated_leader)==TRUE)] <- 0
org_trends$campaigns_with_org[which(is.na(org_trends$campaigns_with_org)==TRUE)] <- 0

org_trends <- org_trends %>% arrange(start_year) %>% mutate(cumulative_campaigns_with_org = cumsum(campaigns_with_org),
                                                            cumulative_campaigns_with_coordinated_leader = cumsum(campaigns_with_coordinated_leader))

org_trends <- full_join(org_trends, all_campaigns_trend_incl_pro)

org_trends$share_organized <- org_trends$cumulative_campaigns_with_org/org_trends$cumulative_campaigns 
org_trends$share_coordinated <- org_trends$cumulative_campaigns_with_coordinated_leader/org_trends$cumulative_campaigns 

org_trends$onset_share_organized   <- org_trends$campaigns_with_org/org_trends$campaigns 
org_trends$onset_share_coordinated <- org_trends$campaigns_with_coordinated_leader/org_trends$campaigns 


#### Absolute -----------------------------------------------------------
tmp <- org_trends %>% select(c("start_year", starts_with("cumulative_"))) %>%  pivot_longer(cols = starts_with("cumulative_"), names_to = "campaign_subset")
tmp$campaign_subset <- case_match(tmp$campaign_subset,
                                  "cumulative_campaigns" ~ "All campaigns",
                                  "cumulative_campaigns_with_coordinated_leader" ~ "With coordinated leadership",
                                  "cumulative_campaigns_with_org" ~ "With organizational participation")



#### Decade-shares -----------------------------------------------------------
tmp <- org_trends %>% mutate(decade = gsub("(...).$", "\\10s", start_year)) %>% group_by(decade) %>%  summarise("campaigns_with_coordinated_leader" = sum(campaigns_with_coordinated_leader),
                                                                                        "campaigns_with_org" = sum(campaigns_with_org),
                                                                                        "campaigns" = sum(campaigns)) %>% 
  mutate("Coordinated" = campaigns_with_coordinated_leader/campaigns,
         "Organized" = campaigns_with_org/campaigns) %>% pivot_longer(cols = c(Coordinated, Organized), values_to = "Share", names_to = "share_type")

ggplot(tmp, aes(x = decade, y = Share, group = share_type, color = share_type)) +
  geom_line(linewidth = 2) +
  geom_point(size = 5, alpha = 0.5, show.legend = FALSE) +
  #geom_col(position=position_dodge(width=0.8), width = 0.8) +
  geom_text(aes(label = gsub("^0", "", round(Share, 2))), color = "black", show.legend = FALSE, nudge_x = -0.3, hjust = 0, vjust = -1, size = 8) +
  # scale_fill_manual(values = c("gray20", "gray60"), labels = c("Share of campaigns with coordinated leadership",
  #                                                              "Share of campaigns with organizational participation")) +
  scale_color_manual(values = c("gray30", "gray60"), labels = c("Share of campaigns with coordinated leadership",
                                                               "Share of campaigns with organizational participation")) +
  scale_x_discrete(breaks = paste0(seq(1780, 2010, 20), "s")) +
  scale_y_continuous(limits = c(0, 1)) +
  theme_minimal() +
  ylab("Decade-share of all campaigns") + xlab("Decade") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        axis.title = element_text(size = 22),
        legend.text = element_text(size = 22))
ggsave("Output/Figure6.jpg", dpi = 350, width = 20, height = 10)


### Share of total campaigns with organization opposition only --------------
org_trends <- omg %>% subset(opposition_filter==0) %>% 
  select(c("start_year", "coord_leaders", starts_with("org_")))
org_trends <- org_trends[, !grepl("_name$", colnames(org_trends)),]
org_trends$n_orgs <- rowSums(org_trends[, grep("^org_", colnames(org_trends))], na.rm = TRUE)
org_trends$any_org <- ifelse(org_trends$n_orgs>0, 1, 0)

org_trends <- org_trends %>% group_by(start_year) %>% summarise("ave_social_groups" = mean(n_orgs),
                                                                "campaigns_with_org" = sum(any_org),
                                                                "campaigns_with_coordinated_leader" = sum(coord_leaders))
org_trends <- full_join(org_trends, tibble("start_year" = 1787:2019))
org_trends$campaigns_with_coordinated_leader[which(is.na(org_trends$campaigns_with_coordinated_leader)==TRUE)] <- 0
org_trends$campaigns_with_org[which(is.na(org_trends$campaigns_with_org)==TRUE)] <- 0

org_trends <- org_trends %>% arrange(start_year) %>% mutate(cumulative_campaigns_with_org = cumsum(campaigns_with_org),
                                                            cumulative_campaigns_with_coordinated_leader = cumsum(campaigns_with_coordinated_leader))

org_trends <- full_join(org_trends, all_campaigns_trend)

org_trends$share_organized <- org_trends$cumulative_campaigns_with_org/org_trends$cumulative_campaigns 
org_trends$share_coordinated <- org_trends$cumulative_campaigns_with_coordinated_leader/org_trends$cumulative_campaigns 

org_trends$onset_share_organized   <- org_trends$campaigns_with_org/org_trends$campaigns 
org_trends$onset_share_coordinated <- org_trends$campaigns_with_coordinated_leader/org_trends$campaigns 


#### Absolute ----------------------------------------------------------------


tmp <- org_trends %>% select(c("start_year", starts_with("cumulative_"))) %>%  pivot_longer(cols = starts_with("cumulative_"), names_to = "campaign_subset")
tmp$campaign_subset <- case_match(tmp$campaign_subset,
                                  "cumulative_campaigns" ~ "All campaigns",
                                  "cumulative_campaigns_with_coordinated_leader" ~ "With coordinated leadership",
                                  "cumulative_campaigns_with_org" ~ "With organizational participation")


#### Decade-shares -----------------------------------------------------------
tmp <- org_trends %>% mutate(decade = gsub("(...).$", "\\10s", start_year)) %>% group_by(decade) %>%  summarise("campaigns_with_coordinated_leader" = sum(campaigns_with_coordinated_leader),
                                                                                                                "campaigns_with_org" = sum(campaigns_with_org),
                                                                                                                "campaigns" = sum(campaigns)) %>% 
  mutate("Coordinated" = campaigns_with_coordinated_leader/campaigns,
         "Organized" = campaigns_with_org/campaigns) %>% pivot_longer(cols = c(Coordinated, Organized), values_to = "Share", names_to = "share_type")


ggplot(tmp, aes(x = decade, y = Share, group = share_type, color = share_type)) +
  geom_line(linewidth = 2) +
  geom_point(size = 5, alpha = 0.5, show.legend = FALSE) +
  geom_text(aes(label = gsub("^0", "", round(Share, 2))), color = "black", show.legend = FALSE, nudge_x = -0.3, hjust = 0, vjust = -1, size = 8) +
  scale_color_manual(values = c("gray30", "gray60"), labels = c("Share of campaigns with coordinated leadership",
                                                                "Share of campaigns with organizational participation")) +
  scale_x_discrete(breaks = paste0(seq(1780, 2010, 20), "s")) +
  scale_y_continuous(limits = c(0, 1)) +
  theme_minimal() +
  ylab("Decade-share of all campaigns") + xlab("Decade") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        axis.title = element_text(size = 22),
        legend.text = element_text(size = 22))
ggsave("Output/FigureB10.jpg", dpi = 350, width = 20, height = 10)

# Ideology ----------------------------------------------------------------
ide_trends <- omg %>% select(c("start_year", starts_with("ide_"))) %>% pivot_longer(cols = starts_with("ide_"), names_to = "ideology")
ide_trends <- ide_trends %>% group_by(start_year, ideology) %>% summarise(onsets = sum(value, na.rm = TRUE))

ide_trends <- full_join(ide_trends, expand.grid("start_year" = 1787:2019, "ideology" = unique(ide_trends$ideology)))
ide_trends <- ide_trends[order(ide_trends$ideology, ide_trends$start_year),]
ide_trends$onsets[which(is.na(ide_trends$onsets)==TRUE)] <- 0
ide_trends <- ide_trends %>% group_by(ideology) %>% mutate("cumulative_onsets" = cumsum(onsets))

#Aesthetics
ide_trends$ideology <- gsub("^ide_", "", ide_trends$ideology)
ide_trends$ideology <- case_match(ide_trends$ideology,
                                  "conservative" ~ "Conservativism",
                                  "democracy_hr" ~ "Democracy and human rights",
                                  "ethnicity" ~ "Ethnicism",
                                  "nationalist" ~ "Nationalism",
                                  "republican" ~ "Republicanism",
                                  "socialist_marxist" ~ "Socialism/Marxism",
                                  .default = ide_trends$ideology)
tmp <- ide_trends[which(ide_trends$start_year==2019),]
ide_trends$ideology <- factor(ide_trends$ideology, levels = tmp$ideology[order(tmp$cumulative_onsets)])

#Add total number of campaigns
ide_trends <- full_join(ide_trends, all_campaigns_trend_incl_pro)


#Plot cumulative
ide_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                      end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA)) %>%
  ggplot(aes(x = start_year, y = cumulative_onsets, color = ideology, group = ideology)) +
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2019, 20)) +
  scale_y_continuous(breaks = seq(0, 1700, 100)) +
  geom_text(aes(color = ideology, x = 2020,
                y = end_value, label = gsub(" and ", " and \n", label)), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,12,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/Figure3.jpg", dpi = 350, width = 16, height = 9)


### 10-year periods ---------------------------------------------------------
ide_trends_lag <- ide_trends %>% group_by(ideology) %>% mutate("onsets_lag1" = lag(onsets, 1),
                                                               "onsets_lag2" = lag(onsets, 2),
                                                               "onsets_lag3" = lag(onsets, 3),
                                                               "onsets_lag4" = lag(onsets, 4),
                                                               "onsets_lag5" = lag(onsets, 5),
                                                               "onsets_lag6" = lag(onsets, 6),
                                                               "onsets_lag7" = lag(onsets, 7),
                                                               "onsets_lag8" = lag(onsets, 8),
                                                               "onsets_lag9" = lag(onsets, 9))

ide_trends_lag$onsets_10year <- rowSums(ide_trends_lag[, grep("^onsets", colnames(ide_trends_lag), value =T)], na.rm = TRUE)
end_values <- ide_trends_lag %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                                        end_value = ifelse(start_year == max(start_year), onsets_10year, NA))

ide_trends_lag %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                          end_value = ifelse(start_year == max(start_year), onsets_10year, NA)) %>%
  ggplot(aes(x = start_year, y = onsets_10year, color = ideology)) + 
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2020, 30)) +
  scale_y_continuous(breaks = seq(0, 200, 10)) +
  geom_text(aes(color = ideology, x = 2020,
                y = end_value, label = gsub(" and ", " and \n", label)), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5, nudge_y = ifelse(end_values$end_value == 160, 0,
                                                               ifelse(end_values$end_value == 2, -2,
                                                                      ifelse(end_values$end_value == 4, +3,
                                                                             ifelse(end_values$end_value == 11, +4, 
                                                                                    ifelse(end_values$end_value == 47, -2,
                                                                                           ifelse(end_values$end_value == 48, +7, end_values$end_value))))))) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,12,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/Figure4.jpg", dpi = 350, width = 18, height = 10)


#Share over 10 years
ide_trends_lag$share_10year <- ide_trends_lag$onsets_10year/ide_trends_lag$campaigns_10year_sum

ggplot(subset(ide_trends_lag, start_year>1799), aes(x = start_year, y = share_10year, color = ideology)) + 
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1800, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 0.9)) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22))
ggsave("Output/FigureB6.jpg", dpi = 350, width = 18, height = 10)


# Ideology opposition only ------------------------------------------------
ide_trends <- omg %>% subset(opposition_filter==0) %>% select(c("start_year", starts_with("ide_"))) %>%
  pivot_longer(cols = starts_with("ide_"), names_to = "ideology")
ide_trends <- ide_trends %>% group_by(start_year, ideology) %>% summarise(onsets = sum(value, na.rm = TRUE))

ide_trends <- full_join(ide_trends, expand.grid("start_year" = 1787:2019, "ideology" = unique(ide_trends$ideology)))
ide_trends <- ide_trends[order(ide_trends$ideology, ide_trends$start_year),]
ide_trends$onsets[which(is.na(ide_trends$onsets)==TRUE)] <- 0
ide_trends <- ide_trends %>% group_by(ideology) %>% mutate("cumulative_onsets" = cumsum(onsets))

#Aesthetics
ide_trends$ideology <- gsub("^ide_", "", ide_trends$ideology)
ide_trends$ideology <- case_match(ide_trends$ideology,
                                  "conservative" ~ "Conservativism",
                                  "democracy_hr" ~ "Democracy and human rights",
                                  "ethnicity" ~ "Ethnicism",
                                  "nationalist" ~ "Nationalism",
                                  "republican" ~ "Republicanism",
                                  "socialist_marxist" ~ "Socialism/Marxism",
                                  .default = ide_trends$ideology)
tmp <- ide_trends[which(ide_trends$start_year==2019),]
ide_trends$ideology <- factor(ide_trends$ideology, levels = tmp$ideology[order(tmp$cumulative_onsets)])

#Add total number of campaigns
ide_trends <- full_join(ide_trends, all_campaigns_trend)


#Plot cumulative
tmp_plotdat <- ide_trends %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                      end_value = ifelse(start_year == max(start_year), cumulative_onsets, NA)) 
tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Ethnicism" & is.na(tmp_plotdat$end_value)==FALSE)] <- tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Ethnicism" & is.na(tmp_plotdat$end_value)==FALSE)] + 20
tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Conservatism" & is.na(tmp_plotdat$end_value)==FALSE)] <- tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Ethnicism" & is.na(tmp_plotdat$end_value)==FALSE)] - 20

ggplot(tmp_plotdat, aes(x = start_year, y = cumulative_onsets, color = ideology, group = ideology)) +
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2019, 20)) +
  scale_y_continuous(breaks = seq(0, 1700, 100)) +
  geom_text(aes(color = ideology, x = 2020,
                y = end_value, label = gsub(" and ", " and \n", label)), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,12,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/FigureB7.jpg", dpi = 350, width = 16, height = 9)


## 10 year periods ---------------------------------------------------------
ide_trends_lag <- ide_trends %>% group_by(ideology) %>% mutate("onsets_lag1" = lag(onsets, 1),
                                                               "onsets_lag2" = lag(onsets, 2),
                                                               "onsets_lag3" = lag(onsets, 3),
                                                               "onsets_lag4" = lag(onsets, 4),
                                                               "onsets_lag5" = lag(onsets, 5),
                                                               "onsets_lag6" = lag(onsets, 6),
                                                               "onsets_lag7" = lag(onsets, 7),
                                                               "onsets_lag8" = lag(onsets, 8),
                                                               "onsets_lag9" = lag(onsets, 9))

ide_trends_lag$onsets_10year <- rowSums(ide_trends_lag[, grep("^onsets", colnames(ide_trends_lag), value =T)], na.rm = TRUE)
end_values <- ide_trends_lag %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                                        end_value = ifelse(start_year == max(start_year), onsets_10year, NA))


tmp_plotdat <- ide_trends_lag %>% mutate(label = ifelse(start_year == max(start_year), as.character(ideology), NA),
                          end_value = ifelse(start_year == max(start_year), onsets_10year, NA)) 
tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Socialism/Marxism" & is.na(tmp_plotdat$end_value)==FALSE)] <- tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Socialism/Marxism" & is.na(tmp_plotdat$end_value)==FALSE)] + 2
tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Republicanism" & is.na(tmp_plotdat$end_value)==FALSE)] <- tmp_plotdat$end_value[which(tmp_plotdat$ideology=="Republicanism" & is.na(tmp_plotdat$end_value)==FALSE)] - 2

ggplot(tmp_plotdat, aes(x = start_year, y = onsets_10year, color = ideology)) + 
  geom_line(linewidth = 2) +
  scale_x_continuous(breaks = seq(1790, 2020, 30)) +
  scale_y_continuous(breaks = seq(0, 200, 10)) +
  geom_text(aes(color = ideology, x = 2020,
                y = end_value, label = gsub(" and ", " and \n", label)), show.legend = FALSE,
            hjust = 0, size = 8, vjust = 0.5) +
  ylab("") + xlab("") +
  scale_color_manual(values = c25) +
  theme_minimal() +
  theme(plot.margin = unit(c(1,12,1,1), "lines"),
        legend.position =  "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size = 22),
        legend.text = element_text(size = 22)) +
  coord_cartesian(expand = TRUE, clip = "off")
ggsave("Output/FigureB8.jpg", dpi = 350, width = 18, height = 10)


# Country timelines -------------------------------------------------------
timeline_coverage_start1900 <- omg_coverage[which(omg_coverage$coverage_start>=1899),]
timeline_coverage_start1900 <- expand.grid("country_id" = unique(timeline_coverage_start1900$country_id),
                                           "year" = 1899:2019)
timeline_coverage_start1900 <- left_join(timeline_coverage_start1900, unique(omg_coverage[, c("country_name", "country_id")]))

timeline_coverage_start1789 <- omg_coverage[which(omg_coverage$coverage_start<1899),]
timeline_coverage_start1789 <- expand.grid("country_id" = unique(timeline_coverage_start1789$country_id),
                                           "year" = 1787:2019)
timeline_coverage_start1789 <- left_join(timeline_coverage_start1789, unique(omg_coverage[, c("country_name", "country_id")]))

onsets <- unique(omg[, c("country_id", "start_year")])
onsets$Onset <- "Yes"
onsets$year <- onsets$start_year
onsets$start_year <- NULL

timeline_coverage_start1900 <- left_join(timeline_coverage_start1900, onsets)
timeline_coverage_start1900$Onset[which(is.na(timeline_coverage_start1900$Onset)==TRUE)] <- "No"
timeline_coverage_start1789 <- left_join(timeline_coverage_start1789, onsets)
timeline_coverage_start1789$Onset[which(is.na(timeline_coverage_start1789$Onset)==TRUE)] <- "No"

#Non-coded years
vdem <- vdemdata::vdem
vdem$country_year <- paste0(vdem$country_id, "_", vdem$year)
timeline_coverage_start1789$country_year <- paste0(timeline_coverage_start1789$country_id, "_", timeline_coverage_start1789$year)
timeline_coverage_start1900$country_year <- paste0(timeline_coverage_start1900$country_id, "_", timeline_coverage_start1900$year)
timeline_coverage_start1789$Onset <- ifelse(timeline_coverage_start1789$country_year %in% vdem$country_year, timeline_coverage_start1789$Onset, "Country\nnon-existent")
timeline_coverage_start1900$Onset <- ifelse(timeline_coverage_start1900$country_year %in% vdem$country_year, timeline_coverage_start1900$Onset, "Country\nnon-existent")

timeline_coverage_start1900$Onset <- factor(timeline_coverage_start1900$Onset, levels = c("Country\nnon-existent", "No", "Yes"))
timeline_coverage_start1789$Onset <- factor(timeline_coverage_start1789$Onset, levels = c("Country\nnon-existent", "No", "Yes"))

p_coverage1900 <- ggplot(timeline_coverage_start1900, aes(x = year, y = country_name, fill = Onset)) +
  geom_raster() +
  scale_y_discrete(limits= rev( sort( unique( timeline_coverage_start1900$country_name) ) ) ) +
  scale_x_continuous(limits = c(1898, 2020), breaks = c(seq(1899, 2019, 25), 2019)) +
  xlab("Year") + ylab("") +
  scale_fill_manual(values = c( "grey80", "white", "black")) +
  geom_hline(yintercept = seq(1, length(unique(timeline_coverage_start1900$country_name)), 1) + 0.5, color = "grey80") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.text.y = element_text(size = 13),
        axis.text.x = element_text(size = 13),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14))
p_coverage1900
ggsave(p_coverage1900, filename = "Output/FigureB2.jpg", dpi = 350, width = 10, height = 12)


p_coverage1789 <- ggplot(timeline_coverage_start1789, aes(x = year, y = country_name, fill = Onset)) +
  geom_raster() +
  scale_y_discrete(limits= rev( sort( unique( timeline_coverage_start1789$country_name) ) ) ) +
  scale_x_continuous(expand = c(0, 1), limits = c(1786, 2022), breaks = c(1787, seq(1800, 2019, 25), 2019)) +
  xlab("Year") + ylab("") +
  scale_fill_manual(values = c( "grey80", "white", "black")) +
  geom_hline(yintercept = seq(1, length(unique(timeline_coverage_start1789$country_name)), 1) + 0.5, color = "grey80") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.text.y = element_text(size = 13),
        axis.text.x = element_text(size = 13),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14))
ggsave(p_coverage1789, filename = "Output/FigureB1.jpg", dpi = 350, width = 12, height = 12)


# Comparison with NAVCO -----------------------------------------------------
cy_omg <- read_rds("Data/TheOMGDataset/R/cy_omg.rds")
comp_omg <- cy_omg[which(cy_omg$count_movements>0) , c("country_name", "year", "count_movements")]
comp_omg$country_name <- tolower(comp_omg$country_name)
comp_omg$country_name[grep("palestine", comp_omg$country_name)] <- "palestine"
comp_omg <- comp_omg |> group_by(country_name, year) |> summarise("omg" = sum(count_movements))


NAVCO2_1_ForPublication_1_ <- read_dta("Data/NAVCO/NAVCO21/NAVCO2-1_ForPublication.dta")
navco_21_cy_n <- NAVCO2_1_ForPublication_1_
navco_21_cy_n$country_name <- tolower(navco_21_cy_n$location)
navco_21_cy_n$country_name <- tolower(navco_21_cy_n$target_country)
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="uk")] <- "united kingdom"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="ussr")] <- "russia"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="bosnia-herzegovina")] <- "bosnia and herzegovina"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="cambodia (kampuchea)")] <- "cambodia"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="cote d'ivoire")] <- "ivory coast"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="czechoslovakia")] <- "czech republic"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="democratic republic of congo")] <- "democratic republic of the congo"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="congo")] <- "republic of the congo"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="east germany")] <- "german democratic republic"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="hyderabad")] <- "india"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="macedonia")] <- "north macedonia"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="myanmar")] <- "burma/myanmar"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="myanmar/burma")] <- "burma/myanmar"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="princely state of travencore")] <- "india"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="south vietnam")] <- "republic of vietnam"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="yemen people's republic")] <- "south yemen"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="yemen arab republic")] <- "yemen"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="yemen (north yemen)")] <- "yemen"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="yugoslavia")] <- "serbia"
navco_21_cy_n$country_name[which(navco_21_cy_n$country_name=="palestinian territories")] <- "palestine"
navco_backup <- navco_21_cy_n
navco_21_cy_n <- navco_21_cy_n |> group_by(country_name, year) |> summarise(navco = length(unique(id)))

comp <- full_join(comp_omg, navco_21_cy_n)
comp$missing_country <- ifelse(!comp$country_name %in% unique(comp_omg$country_name), "Missing country",
                               ifelse(!comp$country_name %in% unique(navco_21_cy_n$country_name), "Missing country", "Not missing country"))

comp$omg[which(is.na(comp$omg)==TRUE)] <- 0
comp$navco[which(is.na(comp$navco)==TRUE)] <- 0
comp$more <- ifelse(comp$navco>comp$omg, "NAVCO",
                    ifelse(comp$navco<comp$omg, "OMG",
                           ifelse(comp$navco==comp$omg, "Equal", "What")))

omg_navco_correlation_text <- comp |> subset(year >= 1945 & year<2014 & missing_country=="Not missing country") |> 
  group_by(omg, navco) |> summarise("N" = n()) |> 
  ggplot(aes(x = navco, y = omg, label = N)) +
  geom_abline(slope =1, intercept = 0) + 
  geom_label(fill = "white") +
  scale_x_continuous(breaks = seq(0, 20, 1)) +
  scale_y_continuous(breaks = seq(0, 20, 1)) +
  #scale_color_manual(values = c(  "tomato2", "black")) +
  xlab("Campaigns in NAVCO 2.1") + ylab("Campaigns in OMG") +
  theme_minimal() +
  theme(legend.position = "none",
        panel.grid.major = element_blank())
ggsave(plot = omg_navco_correlation_text, filename = "Output/FigureD1.jpg", dpi = 350, width = 12, height = 10, scale = 0.7)


## Campaigns covered -------------------------------------------------------
navco_in_omg <- navco_backup[navco_backup$country_name %in% comp_omg$country_name,]
length(unique(navco_in_omg$id))

omg <- read_rds("Data/TheOMGDataset/R/omg.rds")
omg$country_name <- tolower(omg$country_name)
tmp <- omg[which(omg$team=="contemporary"),]
tmp$country_name[grep("palestine", tmp$country_name)] <- "palestine"
tmp <- tmp[which(tmp$end_year>1945 & tmp$start_year<2014),]
omg_in_navco <- tmp[tmp$country_name %in% navco_in_omg$country_name,]
length(unique(omg_in_navco$id))



# Comparison with Revolutionary Episodes -----------------------------------------------------
rev <- read_dta("Data/RevEpisodes/revolutionaryeps.dta")
rev$endyear[which(is.na(rev$endyear)==TRUE)]
rev$endyear <- ifelse(is.na(rev$endyear)==FALSE, rev$endyear, rev$startyear + rev$yrsduration)
rev$one <- 1
rev$row <- 1:nrow(rev)
rev_cy <- rev[rep(row.names(rev), rev$yrsduration), ]

rev_cy <- rev_cy |> group_by(row) |>
  mutate(cumulative_duration = cumsum(one)-1) |> 
  mutate(year = startyear + cumulative_duration)
library(vdemdata)
link <- vdemdata::vdem
link <- unique(link[which(link$year>1899), c("country_name", "country_id", "COWcode", "year")])
link <- na.omit(link)
link_tmp <- link
link <- unique(link[, c("COWcode", "country_id", "country_name")])

rev_cy$COWcode <- ifelse(is.na(rev_cy$cowmetrocode)==TRUE, rev_cy$cowcode,
                         ifelse(rev_cy$cowmetrocode==365 & rev_cy$year < 1990, rev_cy$cowmetrocode,
                                ifelse(nchar(rev_cy$cowcode)==4, rev_cy$cowmetrocode, rev_cy$cowcode))) #USSR
rev_cy <- left_join(rev_cy, link)
rev_cy$country_name[grep("palestine|west bank|gaza", tolower(rev_cy$location))] <- "palestine"
rev_cy_tmp <- rev_cy
rev_cy <- rev_cy |> group_by(country_name, year) |> summarise(rev = n())
rev_cy$country_name <- tolower(rev_cy$country_name)
rev_cy$country_id <- NULL


cy_omg <- read_rds("Data/TheOMGDataset/R/cy_omg.rds")
comp_omg <- cy_omg[which(cy_omg$count_movements>0) , c("country_name", "year", "count_movements")]
comp_omg$country_name <- tolower(comp_omg$country_name)
comp_omg$country_name[grep("palestine", comp_omg$country_name)] <- "palestine"
comp_omg <- comp_omg |> group_by(country_name, year) |> summarise("omg" = sum(count_movements))


comp <- full_join(comp_omg, rev_cy)
comp$missing_country <- ifelse(!comp$country_name %in% unique(comp_omg$country_name), "Missing country",
                               ifelse(!comp$country_name %in% unique(rev_cy$country_name), "Missing country", "Not missing country"))

comp$omg[which(is.na(comp$omg)==TRUE)] <- 0
comp$rev[which(is.na(comp$rev)==TRUE)] <- 0
comp$more <- ifelse(comp$rev>comp$omg, "RE",
                    ifelse(comp$rev<comp$omg, "OMG",
                           ifelse(comp$rev==comp$omg, "Equal", "What")))
prop.table(table(comp$more))
comp |> subset(year >= 1899 & year<2014 & missing_country=="Not missing country") |> summarise(length(unique(country_name)))


omg_rev_correlation_text <- comp |> subset(year >= 1899 & year<2014 & missing_country=="Not missing country") |> 
  group_by(omg, rev) |> summarise("N" = n()) |> 
  ggplot(aes(x = rev, y = omg, label = N)) +
  geom_abline(slope =1, intercept = 0) + 
  geom_label(fill = "white") +
  scale_x_continuous(breaks = seq(0, 20, 1)) +
  scale_y_continuous(breaks = seq(0, 20, 1)) +
  xlab("Campaigns in Revolutionary Episodes") + ylab("Campaigns in OMG") +
  theme_minimal() +
  theme(legend.position = "none",
        panel.grid.major = element_blank())
ggsave(plot = omg_rev_correlation_text, filename = "Output/FigureD2.jpg", dpi = 350, width = 12, height = 10, scale = 0.7)



## Campaigns covered -------------------------------------------------------
rev <- read_dta("Data/RevEpisodes/revolutionaryeps.dta")
rev$endyear[which(is.na(rev$endyear)==TRUE)]
rev$endyear <- ifelse(is.na(rev$endyear)==FALSE, rev$endyear, rev$startyear + rev$yrsduration)

library(vdemdata)
link <- vdemdata::vdem
link <- unique(link[which(link$year>1899), c("country_name", "country_id", "COWcode", "year")])
link <- na.omit(link)
link_tmp <- link
link <- unique(link[, c("COWcode", "country_id", "country_name")])

rev$COWcode <- ifelse(is.na(rev$cowmetrocode)==TRUE, rev$cowcode,
                         ifelse(rev$cowmetrocode==365 & rev$startyear < 1990, rev$cowmetrocode,
                                ifelse(nchar(rev$cowcode)==4, rev$cowmetrocode, rev$cowcode))) #USSR
rev <- left_join(rev, link)
#rev$country_name[grep("palestine|west bank|gaza", tolower(rev_cy$location))] <- "palestine"
rev_tmp <- rev
rev$country_name <- tolower(rev$country_name)


omg <- read_rds("Data/TheOMGDataset/R/omg.rds")
omg$country_name <- tolower(omg$country_name)
omg <- omg[which(omg$team=="contemporary"),]
omg$country_name[grep("palestine", omg$country_name)] <- "palestine"

rev_in_omg <- rev[rev$country_name %in% omg$country_name,]
omg_in_rev <- omg[omg$country_name %in% rev$country_name,]
length(unique(omg_in_rev$id))


# Count words in documents ----------------------------------------------------------
folders <- list.dirs("Data/CountryDocs_OMG", full.names = TRUE)
folders <- folders[2:length(folders)]
filelist <- unlist(lapply(1:length(folders), function(x)
  grep("docx", list.files(folders[x], full.names = TRUE), value =T)
))


texts <- lapply(1:length(filelist), function(x)
  readtext(filelist[x])$text
)

textlist <- lapply(1:length(filelist), function(x)
  stri_count_words( readtext(filelist[x])$text )
)

sum(unlist(textlist))

